####
#### Purpose: Creating the data set to be analyzed. 
#### Author: Kelsey Shoub (kshoub@umass.edu)
#### Last Updated: Oct. 3, 2023
####

############## 1. Setting it up. ##############

# Opening up the necessary packages. If you have not installed these packges before, you will need to do so before running this script. 

library(readr)
library(dplyr)

# Setting the working directory.

setwd("~/Desktop/PolicingTStops/DrivingWhileDifferent/Replication Files")

# Importing the raw data. 
# Raw data download: https://openjustice.doj.ca.gov/data
# More info on RIPA: https://ripalog.com/ripa/ripa-faq.htm#faq2
ca.18 = read_csv("Data/RIPA Stop Data 2018.csv")
ca.19 = read_csv("Data/RIPA Stop Data 2019.csv")
ca.20 = read_csv("Data/RIPA Stop Data 2020.csv")

############## 2a. Cleaning the data: 2018 ##############

# Cleaning the 2018 data. 

ca.18 = ca.18 %>% mutate(
  ask_consent = ifelse(ADS_ASKED_SEARCH_PER==1 | ADS_ASKED_SEARCH_PROP==1,1,0),
  give_consent_per = ifelse(ADS_ASKED_SEARCH_PER==0,-1,
                            ifelse(ADS_SEARCH_PERS_CONSEN==1,1,0)),
  give_consent_prop = ifelse(ADS_ASKED_SEARCH_PROP==0,-1,
                             ifelse(ADS_SEARCH_PROP_CONSEN==1,1,0)),
  give_consent = ifelse(give_consent_per==1|give_consent_prop==1,1,
                        ifelse(give_consent_per==-1&give_consent_prop==-1,-1,0)),
  search_occur = ifelse(ADS_SEARCH_PERSON==1|ADS_SEARCH_PROPERTY==1,1,0),
  search_type = ifelse(ifelse(is.na(BFS_OFFICER_SAFETY),0,BFS_OFFICER_SAFETY) == 1 | 
                         ifelse(is.na(BFS_SEARCH_WARRANT),0,BFS_SEARCH_WARRANT) == 1 |
                         ifelse(is.na(BFS_INCIDENT),0,BFS_INCIDENT) == 1 | 
                         ifelse(is.na(BFS_VEHICLE_INVENT),0,BFS_VEHICLE_INVENT) == 1|
                         ifelse(is.na(BFS_PAROLE),0,BFS_PAROLE) == 1 |
                         ifelse(is.na(BFS_EXIGENT_CIRCUM),0,BFS_EXIGENT_CIRCUM) == 1, "Mandatory",
                       ifelse(ifelse(is.na(BFS_SUSPECT_WEAPON),0,BFS_SUSPECT_WEAPON) == 1 |
                                ifelse(is.na(BFS_VISIBLE_CONTRABAND),0,BFS_VISIBLE_CONTRABAND) == 1|
                                ifelse(is.na(BFS_ODOR_CONTRABAND),0,BFS_ODOR_CONTRABAND) == 1 | 
                                ifelse(is.na(BFS_CANINE_DETECT),0,BFS_CANINE_DETECT) == 1 |
                                ifelse(is.na(BFS_EVIDENCE),0,BFS_EVIDENCE) == 1 |
                                ifelse(is.na(BFS_SCHOOL_POLICY),0,BFS_SCHOOL_POLICY) == 1, 
                              "Discretionary: Probable Cause",
                              ifelse(ifelse(is.na(BFS_CONSENT_GIVEN),0,BFS_CONSENT_GIVEN) == 1, 
                                     "Discretionary: Consent Search","No Search"))),
  search_sequence = ifelse(ask_consent == 1, 
                           ifelse(give_consent == 1, 
                                  ifelse(search_occur==1,
                                         "3 Consent Given, Search Occurred", "4 Consent Given, No Search"), 
                                  ifelse(search_occur==1,
                                         "1 No Consent Given, Search Occurred", "2 No Consent Given, No Search")), 
                           ifelse(search_occur == 1,
                                  "5 Other Search","0 No Search")),
  search_sequence2 = ifelse(grepl("0",search_sequence),0,
                            ifelse(grepl("1",search_sequence),1,
                                   ifelse(grepl("2",search_sequence),2,
                                          ifelse(grepl("3",search_sequence),3,
                                                 ifelse(grepl("4",search_sequence),4,5))))),
  investigatory_stop = ifelse(REASON_FOR_STOP == 1, 
                              ifelse(RFS_TRAFFIC_VIOLATION_TYPE == 1, 0, 1),
                              1),
  contraband_found = ifelse(CED_NONE_CONTRABAND==1,0,1),
  race_recode = ifelse(RAE_FULL == 7, 0, RAE_FULL),
  school_stop = ifelse(K12_SCHOOL_GROUNDS==1 | STOP_STUDENT==1 |
                         REASON_FOR_STOP == 7 | REASON_FOR_STOP == 8,1,0)
)

ca.18$search_reason_total = rowSums(ca.18[,grep("BFS_",colnames(ca.18))],na.rm=T)

# Dropping a small number of columns to ease the computational burden. 

ca.18 = ca.18[,-c(grep("RFS_*",colnames(ca.18)),
                  grep("BPS_*",colnames(ca.18)),
                  grep("TPS_*",colnames(ca.18)))]

# Dropping officer initiated calls.

ca.18 = ca.18 %>% filter(CALL_FOR_SERVICE == 0)

############## 2b. Cleaning the data: 2019 ##############

# Cleaning the 2019 data. 

ca.19 = ca.19 %>% mutate(
  ask_consent = ifelse(ADS_ASKED_SEARCH_PER==1 | ADS_ASKED_SEARCH_PROP==1,1,0),
  give_consent_per = ifelse(ADS_ASKED_SEARCH_PER==0,-1,
                            ifelse(ADS_SEARCH_PERS_CONSEN==1,1,0)),
  give_consent_prop = ifelse(ADS_ASKED_SEARCH_PROP==0,-1,
                             ifelse(ADS_SEARCH_PROP_CONSEN==1,1,0)),
  give_consent = ifelse(give_consent_per==1|give_consent_prop==1,1,
                        ifelse(give_consent_per==-1&give_consent_prop==-1,-1,0)),
  search_occur = ifelse(ADS_SEARCH_PERSON==1|ADS_SEARCH_PROPERTY==1,1,0),
  search_type = ifelse(ifelse(is.na(BFS_OFFICER_SAFETY),0,BFS_OFFICER_SAFETY) == 1 | 
                         ifelse(is.na(BFS_SEARCH_WARRANT),0,BFS_SEARCH_WARRANT) == 1 |
                         ifelse(is.na(BFS_INCIDENT),0,BFS_INCIDENT) == 1 | 
                         ifelse(is.na(BFS_VEHICLE_INVENT),0,BFS_VEHICLE_INVENT) == 1| 
                         ifelse(is.na(BFS_PAROLE),0,BFS_PAROLE) == 1 |
                         ifelse(is.na(BFS_EXIGENT_CIRCUM),0,BFS_EXIGENT_CIRCUM) == 1, "Mandatory",
                       ifelse(ifelse(is.na(BFS_SUSPECT_WEAPON),0,BFS_SUSPECT_WEAPON) == 1 |
                                ifelse(is.na(BFS_VISIBLE_CONTRABAND),0,BFS_VISIBLE_CONTRABAND) == 1|
                                ifelse(is.na(BFS_ODOR_CONTRABAND),0,BFS_ODOR_CONTRABAND) == 1 | 
                                ifelse(is.na(BFS_CANINE_DETECT),0,BFS_CANINE_DETECT) == 1 |
                                ifelse(is.na(BFS_EVIDENCE),0,BFS_EVIDENCE) == 1 |
                                ifelse(is.na(BFS_SCHOOL_POLICY),0,BFS_SCHOOL_POLICY) == 1, 
                              "Discretionary: Probable Cause",
                              ifelse(ifelse(is.na(BFS_CONSENT_GIVEN),0,BFS_CONSENT_GIVEN) == 1, 
                                     "Discretionary: Consent Search","No Search"))),
  search_sequence = ifelse(ask_consent == 1, 
                           ifelse(give_consent == 1, 
                                  ifelse(search_occur==1,
                                         "3 Consent Given, Search Occurred", "4 Consent Given, No Search"), 
                                  ifelse(search_occur==1,
                                         "1 No Consent Given, Search Occurred", "2 No Consent Given, No Search")), 
                           ifelse(search_occur == 1,
                                  "5 Other Search","0 No Search")),
  search_sequence2 = ifelse(grepl("0",search_sequence),0,
                            ifelse(grepl("1",search_sequence),1,
                                   ifelse(grepl("2",search_sequence),2,
                                          ifelse(grepl("3",search_sequence),3,
                                                 ifelse(grepl("4",search_sequence),4,5))))),
  investigatory_stop = ifelse(REASON_FOR_STOP == 1, 
                              ifelse(RFS_TRAFFIC_VIOLATION_TYPE == 1, 0, 1),
                              1),
  contraband_found = ifelse(CED_NONE_CONTRABAND==1,0,1),
  race_recode = ifelse(RAE_FULL == 7, 0, RAE_FULL),
  school_stop = ifelse(K12_SCHOOL_GROUNDS==1 | STOP_STUDENT==1 |
                         REASON_FOR_STOP == 7 | REASON_FOR_STOP == 8,1,0)
)

ca.19$search_reason_total = rowSums(ca.19[,grep("BFS_",colnames(ca.19))],na.rm=T)

# Dropping a small number of columns to ease the computational burden.

ca.19 = ca.19[,-c(grep("RFS_*",colnames(ca.19)),
                  grep("BPS_*",colnames(ca.19)),
                  grep("TPS_*",colnames(ca.19)))]

# Dropping officer initiated calls.

ca.19 = ca.19 %>% filter(CALL_FOR_SERVICE == 0)

############## 2c. Cleaning the data: 2020 ##############

# Cleaning the 2020 data. 

ca.20 = ca.20 %>% mutate(
  ask_consent = ifelse(ADS_ASKED_SEARCH_PER==1 | ADS_ASKED_SEARCH_PROP==1,1,0),
  give_consent_per = ifelse(ADS_ASKED_SEARCH_PER==0,-1,
                            ifelse(ADS_SEARCH_PERS_CONSEN==1,1,0)),
  give_consent_prop = ifelse(ADS_ASKED_SEARCH_PROP==0,-1,
                             ifelse(ADS_SEARCH_PROP_CONSEN==1,1,0)),
  give_consent = ifelse(give_consent_per==1|give_consent_prop==1,1,
                        ifelse(give_consent_per==-1&give_consent_prop==-1,-1,0)),
  search_occur = ifelse(ADS_SEARCH_PERSON==1|ADS_SEARCH_PROPERTY==1,1,0),
  search_type = ifelse(ifelse(is.na(BFS_OFFICER_SAFETY),0,BFS_OFFICER_SAFETY) == 1 | 
                         ifelse(is.na(BFS_SEARCH_WARRANT),0,BFS_SEARCH_WARRANT) == 1 |
                         ifelse(is.na(BFS_INCIDENT),0,BFS_INCIDENT) == 1 | 
                         ifelse(is.na(BFS_VEHICLE_INVENT),0,BFS_VEHICLE_INVENT) == 1| 
                         ifelse(is.na(BFS_PAROLE),0,BFS_PAROLE) == 1 |
                         ifelse(is.na(BFS_EXIGENT_CIRCUM),0,BFS_EXIGENT_CIRCUM) == 1, "Mandatory",
                       ifelse(ifelse(is.na(BFS_SUSPECT_WEAPON),0,BFS_SUSPECT_WEAPON) == 1 |
                                ifelse(is.na(BFS_VISIBLE_CONTRABAND),0,BFS_VISIBLE_CONTRABAND) == 1|
                                ifelse(is.na(BFS_ODOR_CONTRABAND),0,BFS_ODOR_CONTRABAND) == 1 | 
                                ifelse(is.na(BFS_CANINE_DETECT),0,BFS_CANINE_DETECT) == 1 |
                                ifelse(is.na(BFS_EVIDENCE),0,BFS_EVIDENCE) == 1 |
                                ifelse(is.na(BFS_SCHOOL_POLICY),0,BFS_SCHOOL_POLICY) == 1, 
                              "Discretionary: Probable Cause",
                              ifelse(ifelse(is.na(BFS_CONSENT_GIVEN),0,BFS_CONSENT_GIVEN) == 1, 
                                     "Discretionary: Consent Search","No Search"))),
  search_sequence = ifelse(ask_consent == 1, 
                           ifelse(give_consent == 1, 
                                  ifelse(search_occur==1,
                                         "3 Consent Given, Search Occurred", "4 Consent Given, No Search"), 
                                  ifelse(search_occur==1,
                                         "1 No Consent Given, Search Occurred", "2 No Consent Given, No Search")), 
                           ifelse(search_occur == 1,
                                  "5 Other Search","0 No Search")),
  search_sequence2 = ifelse(grepl("0",search_sequence),0,
                            ifelse(grepl("1",search_sequence),1,
                                   ifelse(grepl("2",search_sequence),2,
                                          ifelse(grepl("3",search_sequence),3,
                                                 ifelse(grepl("4",search_sequence),4,5))))),
  investigatory_stop = ifelse(REASON_FOR_STOP == 1, 
                              ifelse(RFS_TRAFFIC_VIOLATION_TYPE == 1, 0, 1),
                              1),
  contraband_found = ifelse(CED_NONE_CONTRABAND==1,0,1),
  race_recode = ifelse(RAE_FULL == 7, 0, RAE_FULL),
  school_stop = ifelse(K12_SCHOOL_GROUNDS==1 | STOP_STUDENT==1 |
                         REASON_FOR_STOP == 7 | REASON_FOR_STOP == 8,1,0)
)

ca.20$search_reason_total = rowSums(ca.20[,grep("BFS_",colnames(ca.20))],na.rm=T)

# Dropping a small number of columns to ease the computational burden.

ca.20 = ca.20[,-c(grep("RFS_*",colnames(ca.20)),
                  grep("BPS_*",colnames(ca.20)),
                  grep("TPS_*",colnames(ca.20)))]

# Dropping officer initiated calls.

ca.20 = ca.20 %>% filter(CALL_FOR_SERVICE == 0)

############## 2d. Appending ##############

# Appending the cleaned data sets to each other. 

ca = bind_rows(ca.18,ca.19,ca.20)

# Removing the by year data sets. 

rm(ca.18)
rm(ca.19)
rm(ca.20)

############## 3. Last Cleaning + Saving the Full File ##############

# Creating a variables that indicate time. 
ca$date = apply(as.matrix(ca$DATE_OF_STOP),1,
                function(x){strsplit(x," ")[[1]][1]})
ca$hod = apply(as.matrix(ca$TIME_OF_STOP),1,
               function(x){substr(x,1,2)})

# Additional cleaning. 
ca = ca %>% 
  mutate(
    agency_type = ifelse(grepl("CO S",AGENCY_NAME),"Sheriff",
                         ifelse(grepl(" PD",AGENCY_NAME)|
                                  grepl("LAPD",AGENCY_NAME),"Police",
                                ifelse(grepl("CHP",AGENCY_NAME),"SHP","Missing"))),
    date2 = as.Date(DATE_OF_STOP, format = "%d-%b-%y"),
    date1 = as.Date(date, format = "%m/%d/%Y"),
    dow = ifelse(is.na(date2),weekdays(date1),weekdays(date2)),
    midnight = ifelse(TIME_OF_STOP == "0000",1,0),
    disability = ifelse(PD_FULL==0,0,1),
    year = ifelse(is.na(date2),format(date1,'%Y'),format(date2,'%Y')),
    month = ifelse(is.na(date2),format(date1,'%m'),format(date2,'%m'))
  )

ca$unique_id = do.call(paste, 
                       c(ca[,c("AGENCY_NAME","DOJ_RECORD_ID")], 
                         sep="::"))

# Selecting only a subset of variables to save.
ca = ca %>% select(DOJ_RECORD_ID,PERSON_NUMBER,unique_id,
                   REASON_FOR_STOP,
                   ADS_SEARCH_PERSON,ADS_SEARCH_PROPERTY,
                   BFS_CONSENT_GIVEN,BFS_OFFICER_SAFETY,BFS_SEARCH_WARRANT,
                   BFS_PAROLE,BFS_SUSPECT_WEAPON,BFS_VISIBLE_CONTRABAND,
                   BFS_ODOR_CONTRABAND,BFS_CANINE_DETECT,BFS_EVIDENCE,
                   BFS_INCIDENT,BFS_EXIGENT_CIRCUM,BFS_VEHICLE_INVENT,
                   BFS_SCHOOL_POLICY,
                   CED_NONE_CONTRABAND,CED_FIREARM,CED_AMMUNITION,CED_MONEY,
                   CED_DRUG_PARAPHERNALIA,CED_STOLEN_PROP,CED_ELECT_DEVICE,
                   CED_WEAPON,CED_DRUGS,CED_ALCOHOL,CED_OTHER_CONTRABAND,
                   search_occur,race_recode,G_FULL,AGE,
                   LIMITED_ENGLISH_FLUENCY,disability,LGBT,
                   investigatory_stop,dow,hod,month,year,AGENCY_NAME,
                   search_type,ask_consent,give_consent,agency_type,
                   school_stop,search_sequence,contraband_found)

# Saving the file. 
save(ca, file = "Data/CA.RData")
